

congs <- 101:113

load("all_data.Rda")
load("legDataTmp.Rda")
load("speeches.Rda")
load("y.Rda")
load(paste0(getwd(), "/EMestimates.Rda"))


# Calculate the number of cores
no_cores <- min(detectCores() - 1, 10)


# define matrix inverse function
matrix_inverse <- function(X) chol2inv(chol(X))

RNGkind("L'Ecuyer-CMRG")
set.seed(1234)

# do the sampling up front
placebo_sampling <- NULL
for (n in 1:100){
  placebo_sampling[[n]] <- 
    all_data %>% 
    dplyr::group_by(pid, cong) %>%
    dplyr::reframe(d = sample(unique(rollcall), size = round(0.05*length(unique(rollcall))), replace = FALSE)) %>%
    dplyr::ungroup() %>%
    dplyr::rename(congress = cong)
}

placebo_maker <- function(placebo) {
  
  all_data_placebo <- NULL
  for (cong in congs){
    
    load(paste0(getwd(), "/hou",cong,"kh.ord",sep=""))
    
    #pick out roll calls
    y <- dataset$votes[-1,]
    
    #replace anything but 1 and 6 with missing
    ynew <- matrix(NA, nrow(y), ncol(y))
    ynew[y %in% c(4,5,6)] <- 0
    ynew[y %in% c(1,2,3)] <- 1
    
    y <- ynew
    rownames(y) <- dataset$legis.data$icpsrLegis[-1]
    colnames(y) <- paste(cong,"-",1:ncol(y),sep="")
    
    #now, we need to remove near-unanimous roll calls
    dropthese <- sapply(1:ncol(y), function(i) sum(prop.table(table(y[,i]))>0.975)>0)
    dropthese <- (1:ncol(y))[dropthese==TRUE]
    y <- y[,-dropthese]
    
    
    #now, we create 'placebo' party loyalty estimates
    #first we identify the relevant leaders
    
    
    if(cong < 104) rep_leader <- "Michel"
    if(cong>=104 & cong <=105) rep_leader <- c("Gingrich", "Armey")
    if(cong>=106 & cong <=108) rep_leader <- c("Hastert", "Armey")
    if(cong == 108) rep_leader <- c("Hastert", "Delay")
    if(cong == 109) rep_leader <- c("Hastert", "Boehner")
    if(cong>=110 & cong<=111) rep_leader <- "Boehner"
    if(cong > 111) rep_leader <- c("Boehner", "Cantor")
    
    if(cong < 104) dem_leader <- c("Foley", "Gephardt", "Wright")
    if(cong>=104 & cong <=107) dem_leader <- "Gephardt"
    if(cong>=108 & cong<=109) dem_leader <- "Pelosi"
    if(cong>=110 & cong<=111) dem_leader <- c("Pelosi","Hoyer")
    if(cong > 111) dem_leader <- c("Pelosi")
    
    
    r_icpsr <- dataset$legis.data[grep(paste(tolower(rep_leader),collapse="|"),tolower(rownames(dataset$legis.data))),]$icpsrLegis
    d_icpsr <- dataset$legis.data[grep(paste(tolower(dem_leader),collapse="|"),tolower(rownames(dataset$legis.data))),]$icpsrLegis
    
    y_r_leader <- y[as.character(r_icpsr),]
    y_d_leader <- y[as.character(d_icpsr),]
    
    
    ##now we are going to randomly choose 5% of roll calls
    ##and pretend the leader's vote is a speech
    r_rc_samp <- placebo_sampling[[placebo]] %>% dplyr::filter(congress == cong, pid == 2) %>% dplyr::select(d) %>% .[[1]] %>% as.character()
    d_rc_samp <- placebo_sampling[[placebo]] %>% dplyr::filter(congress == cong, pid == 1) %>% dplyr::select(d) %>% .[[1]] %>% as.character()
    
    rep_speech_rev <- rep(0,ncol(y))
    if (length(r_icpsr)==1){
      names(rep_speech_rev) <- names(y_r_leader)
      rep_speech_rev[r_rc_samp] <- 2*y_r_leader[r_rc_samp]-1
    }else{
      names(rep_speech_rev) <- dimnames(y_r_leader)[[2]]
      rep_speech_rev[r_rc_samp] <- apply(2*y_r_leader[,r_rc_samp]-1, 2, median, na.rm=TRUE)
    }
    
    dem_speech_rev <- rep(0,ncol(y))
    if (length(d_icpsr)==1){
      names(dem_speech_rev) <- names(y_d_leader)
      dem_speech_rev[d_rc_samp] <- 2*y_d_leader[d_rc_samp]-1
    }else{
      names(dem_speech_rev) <- dimnames(y_d_leader)[[2]]
      dem_speech_rev[d_rc_samp] <- apply(2*y_d_leader[,d_rc_samp]-1, 2, median, na.rm=TRUE)
    }
    
    #fix the missing data
    dem_speech_rev[is.na(dem_speech_rev)] <- 0
    rep_speech_rev[is.na(rep_speech_rev)] <- 0
    
    
    #recode party
    party <- dataset$legis.data$partyCode[-1]/100
    y <- y[which(party<=2),]
    party <- party[which(party<=2)]
    
    #make speech matrix
    speech <- rbind(dem_speech_rev,rep_speech_rev)
    colnames(speech) <- colnames(y)
    
    N <- nrow(y)
    J <- ncol(y)
    
    
    M <- length(y)
    ii <- rep(1:N, times=J)
    jj <- rep(1:J, each=N)
    pid <- c(matrix(rep(party,J), N, J))
    republican <- (party==2)*1
    democrat <- (party==1)*1
    Zlong <- cbind(democrat[ii]*dem_speech_rev[jj], republican[ii]*rep_speech_rev[jj])
    
    all_data_placebo_tmp <- data.frame(cong=cong, id=rep(rownames(y),times=J), rollcall=rep(colnames(y),each=N),
                                       pid=pid, leader_speech=democrat[ii]*dem_speech_rev[jj]+republican[ii]*rep_speech_rev[jj],
                                       vote=c(y), name=rep(rownames(dataset$legis.data)[which(party<=2)],times=J))
    
    all_data_placebo <- rbind(all_data_placebo,all_data_placebo_tmp)
  }
  
  
  #here we go
  y <- spread(all_data_placebo[,c(2,3,6)], key = rollcall, value=vote)
  rownames(y) <- y[,1]
  y <- y[,-1]
  speeches <- spread(all_data_placebo[,c(2,3,5)], key = rollcall, value=leader_speech)
  speeches <- speeches[,-1]
  speeches <- as.matrix(speeches)
  speeches[is.na(speeches)] <- 0
  
  ##Set things up
  N <- nrow(y)
  J <- ncol(y)
  C <- length(congs)
  
  ##Initial values using Heckman-Snyder
  
  y <- as.matrix(y)
  lower <- dataset$legis.data$icpsrLegis[grep("PELOSI",rownames(dataset$votes))]
  upper <- dataset$legis.data$icpsrLegis[grep("SENSENBR",rownames(dataset$votes))]
  lower <- which(rownames(y)==lower)
  upper <- which(rownames(y)==upper)
  
  x.current <- results$x.current
  al.current <- results$al.current
  be.current <- results$be.current
  de.current <- matrix(0,N,C)
  
  #we need to make a vector of time periods
  times <- as.numeric(gsub("-.*","",colnames(y)))-min(as.numeric(gsub("-.*","",colnames(y))))+1
  TimeMat <- matrix(0,J,C)
  for (j in 1:nrow(TimeMat)) TimeMat[j,times[j]] <- 1
  
  #hyperprior parameters
  b0 <- rep(0,2)
  B0 <- 25*diag(2)
  t0 <- 0
  T0 <- 1
  d0 <- rep(0,length(unique(all_data_placebo$cong)))
  D0 <- rep(3,length(unique(all_data_placebo$cong)))
  dd0 <- matrix(c(t0,d0))
  DD0 <- diag(c(T0,D0))
  
  
  # #definte the speech-time matrix
  legTimeMatrix <- vector("list", N)
  for (i in 1:nrow(y)) legTimeMatrix[[i]] <- matrix(rep(speeches[i,],each=C),J,C,byrow=TRUE)*TimeMat
  
  #define the update functions
  
  ab.update_chol <- function(j){
    matrix_inverse(crossprod(Xstar) + matrix_inverse(B0))%*%(crossprod(Xstar,Yhat[,j]) + matrix_inverse(B0)%*%b0)
  }
  
  
  xd.update_chol <- function(i){
    Xstar <- cbind(be.update,legTimeMatrix[[i]])
    matrix_inverse(crossprod(Xstar) + matrix_inverse(DD0))%*%(crossprod(Xstar,Yhat[i,]) + matrix_inverse(DD0)%*%dd0)
  }
  
  
  tol <- 0
  count <- 0
  deviance <- 100
  llik <- NULL
  
  ##create a bunch of matrix holders
  mu <- matrix(NA, N, J)
  dmu <- matrix(NA, N, J)
  pmu <- matrix(NA, N, J)
  
  while (tol<1-1e-3){
    
    iter.start <- proc.time()[3]
    
    ##create the NxJ matrix of legislator loyalty times speeches
    d_speech_mat <- as.matrix(de.current[,times]*speeches)
    
    ##calculate the matrix of means
    mu <- -matrix(rep(al.current,N),N,J,byrow=TRUE) +
      outer(x.current,be.current) +
      d_speech_mat
    
    #a minor correction for outliers
    mu[mu < -8] <- -8
    mu[mu > 8] <- 8
    
    #set the value of ystar for this iteration
    dmu <- dnorm(-mu)
    pmu <- pnorm(-mu)
    ystar <- (mu+dmu/(1-pmu))*(y==1) + (mu-dmu/(pmu))*(y==0)
    ystar[is.na(y)] <- mu[is.na(y)]
    
    ##set up variables and export for parallel estimation of roll call parameters
    Yhat <- ystar-d_speech_mat
    Xstar <- cbind(-1,x.current)
    
    ##estimate roll call parameters
    tmpRC <- lapply(1:J, ab.update_chol)
    tmpRC <- do.call(cbind,tmpRC)
    
    al.update <- tmpRC[1,]
    be.update <- tmpRC[2,]
    
    ##set up variables and export for parallel estimation of legislator parameters
    Yhat <- ystar + matrix(rep(al.current,N),N,J,byrow=TRUE)
    
    # Initiate cluster
    tmpLegis <- lapply(1:N, xd.update_chol)
    tmpLegis <- do.call(cbind,tmpLegis)
    
    x.update <- tmpLegis[1,]
    de.update <- t(tmpLegis[2:nrow(tmpLegis),])
    
    
    ##make sure orientation of x-axis is correct
    if (x.update[lower]>x.update[upper]){
      x.update <- -x.update
      be.update <- -be.update
    }
    
    
    ##calculate correlation-based model tolerance
    if (count>0) tol <- min(c(cor(x.update,x.current), cor(al.update,al.current),
                              cor(be.update,be.current), cor(c(de.update),c(de.current))))
    
    
    ##set new values as those just estimated
    x.current <- x.update
    al.current <- al.update
    be.current <- be.update
    de.current <- de.update
    
    count <- count + 1
  }
  
  gc(verbose = FALSE)
  data.frame(placebo_trial=placebo, id=rep(rownames(y),length(congs)), delta=c(de.current))
  
}


res_tmp <- lapply(1:100, placebo_maker)

results_placebo <- do.call(rbind, res_tmp)


save(results_placebo, file = paste0(getwd(), "/EMestimates_placebo.Rda"))

cat("Replication file 3 done.","\n")